#### PREAMBLE ####

#load our packages
library(foreign)
library(plyr)
library(tidyverse)
library(olsrr)
library(nnet)
library(reshape2)
library(broom)
library(DescTools)
library(rms)
library(lmtest)
library(stargazer)
library(ggplot2)
library(readr)
library(readxl)
library(plm)
library(foreach)
library(parallel)
library(readstata13)
library(ggeffects)
library(estimatr)
library(margins)
library(MASS)
library(forecast)
library(geosphere)

#### LOAD DATASETS AND MERGE ####
df1 <- read.csv("Source data/municipal_crime_data_final.csv", na.strings="", stringsAsFactors = F)
df1.1 <- read.csv("Source data/municipal_crime_data_final_new_meth.csv", na.strings="", stringsAsFactors = F)

#merging old and new methodology crime datasets
df1.1 <- df1.1[df1.1$ANO<=2019,]
df1 <- df1[df1$ANO<=2014,]
df <- rbind(df1, df1.1)
rm(df1.1, df1)
gc()

df$INEGI <- sprintf("%05d", df$INEGI)
df$INEGI <- as.character(df$INEGI)
df$time <- as.Date(df$time)

#editing column names
colnames(df)
colnames(df)[1] <- "Year"
colnames(df)[5] <- "State"
colnames(df)[6] <- "Municipality"

#load municipality coordinates data
coords <- read.csv("Source data/municipality_coords.csv", stringsAsFactors = F, na.strings = "")
coords$code <- sprintf("%05d", coords$code)

#combining datasets
df <- left_join(df, coords, by=c("INEGI"="code"))
rm(coords)
gc()

#### ADDING VARIABLE TREATMENT TIMES ####

times <- read.csv("Source data/municipality_treatment_times.csv", stringsAsFactors = F, na.strings = "")

#create Mun_State
df$Mun_State <- paste(df$Municipality, df$State, sep="_")

#document date
df$trade <- 0
for(i in times$Mun_State){
  date <- times$document.date[times$Mun_State==i]
  df$trade[df$Mun_State==i & df$time>=date] <- 1
}

#US export date
df$ustrade <- 0
for(i in times$Mun_State){
  date <- times$est_us_date[times$Mun_State==i]
  df$ustrade[df$Mun_State==i & df$time>=date] <- 1
}

length(unique(df$INEGI[df$trade==1]))
length(unique(df$INEGI[df$ustrade==1]))

#code treat_group
df <- df %>%
  group_by(INEGI) %>%
  mutate(trade_mean = mean(trade, na.rm = TRUE)) %>%
  ungroup()

unique(df$trade_mean)
df$treat_group <- NA_character_
df$treat_group[df$trade_mean==0] <- "Control"
df$treat_group[df$trade_mean==1] <- "Alwayson"
df$treat_group[df$trade_mean!=0 & df$trade_mean!=1] <- "Turnson"


#### ADDING ADDITIONAL OUTCOME VARIABLES ####

df1 <- read.csv("Source data/IDM_nov2023.csv", na.strings="", stringsAsFactors = F)
df2 <- read.csv("Source data/IDM_NM_feb24.csv", na.strings="", stringsAsFactors = F)

df1$ANO <- as.numeric(df1$ANO)
df2 <- df2[!df2$Ano %in% c("Ano"),]
df2$Ano <- as.numeric(df2$Ano)
df2 <- df2[df2$Ano<=2019,]
df1 <- df1[df1$ANO<=2014,]

df1$INEGI <- sprintf("%05d", df1$INEGI)
df1$INEGI <- as.character(df1$INEGI)
colnames(df2)[4] <- "INEGI"
df2$INEGI <- as.numeric(df2$INEGI)
df2$INEGI <- sprintf("%05d", df2$INEGI)
df2$INEGI <- as.character(df2$INEGI)

#add these types of theft

df1 <- df1 %>% 
  filter(MODALIDAD=="ROBO COMUN") %>% 
  filter(TIPO=="SIN VIOLENCIA") %>% 
  filter(SUBTIPO %in% c("A CASA HABITACION", "A NEGOCIO", "A TRANSPORTISTAS", "A TRANSEUNTES"))

df2 <- df2 %>% 
  filter(Tipo.de.delito=="Robo") %>% 
  filter(Subtipo.de.delito %in% c("Robo a casa habitaci\xf3n", "Robo a negocio", "Robo a transportista", "Robo a transe\xfante en via p\xfablica", "Robo a transe\xfante en espacio abierto al p\xfablico")) %>% 
  filter(Modalidad=="Sin violencia")

colnames(df1)
df2 <- df2[,c("Ano", "INEGI", "Entidad", "Municipio", "Tipo.de.delito", "Modalidad", "Subtipo.de.delito",
              "Enero", "Febrero", "Marzo", "Abril", "Mayo", "Junio", "Julio", "Agosto", "Septiembre", "Octubre", "Noviembre", "Diciembre")]
colnames(df2) <- colnames(df1)
df2$ANO <- as.numeric(df2$ANO)
df2$SUBTIPO[df2$SUBTIPO=="Robo a casa habitaci\xf3n"] <- "A CASA HABITACION"
df2$SUBTIPO[df2$SUBTIPO=="Robo a negocio"] <- "A NEGOCIO"
df2$SUBTIPO[df2$SUBTIPO=="Robo a transportista"] <- "A TRANSPORTISTAS"
df2$SUBTIPO[df2$SUBTIPO=="Robo a transe\xfante en via p\xfablica"] <- "A TRANSEUNTES"
df2$SUBTIPO[df2$SUBTIPO=="Robo a transe\xfante en espacio abierto al p\xfablico"] <- "A TRANSEUNTES"

colnames(df1)[8:19] <- c(1:12)
colnames(df2)[8:19] <- c(1:12)

#df1
#transpose to long time
long_df <- df1 %>%
  pivot_longer(
    cols = as.character(c(1:12)),  # Select month columns
    names_to = "MONTH",  # This will be the name of the new column for months
    values_to = "COUNT",  # This will be the name of the new column for the number of robberies
  )
#now transpose again, wide
wider_df <- long_df %>%
  pivot_wider(
    names_from = SUBTIPO,   # The column whose unique values will become new column names
    values_from = COUNT,    # The values to fill in the new columns
  )
df1 <- wider_df


#df2
#transpose to long time
long_df <- df2 %>%
  pivot_longer(
    cols = as.character(c(1:12)),  # Select month columns
    names_to = "MONTH",  # This will be the name of the new column for months
    values_to = "COUNT",  # This will be the name of the new column for the number of robberies
  )

class(long_df$COUNT)
long_df$COUNT <- as.numeric(long_df$COUNT)
#combine A TRANSEUNTES rows for df2
long_df_sums <- long_df %>% 
  filter(SUBTIPO=="A TRANSEUNTES") %>% 
  group_by(ANO, INEGI, MONTH) %>% 
  mutate(COUNT = sum(COUNT, na.rm=T)) %>% 
  distinct(ANO, INEGI, MONTH, .keep_all = T)
long_df <- long_df %>% 
  filter(SUBTIPO!="A TRANSEUNTES")

long_df <- rbind(long_df, long_df_sums)
rm(long_df_sums)
gc()

#now transpose again, wide
wider_df <- long_df %>%
  pivot_wider(
    names_from = SUBTIPO,   # The column whose unique values will become new column names
    values_from = COUNT,    # The values to fill in the new columns
  )
df2 <- wider_df

rm(long_df, wider_df)
gc()

colnames(df1)
df1 <- df1[,c("ANO", "INEGI", "MONTH", "A CASA HABITACION", "A NEGOCIO", "A TRANSPORTISTAS", "A TRANSEUNTES")]
df2 <- df2[,c("ANO", "INEGI", "MONTH", "A CASA HABITACION", "A NEGOCIO", "A TRANSPORTISTAS", "A TRANSEUNTES")]
new_vars <- rbind(df1, df2)
rm(df1, df2)
gc()

#create time variable and left join to df
colnames(new_vars)
colnames(new_vars)[4] <- "home_robbery"
colnames(new_vars)[5] <- "business_robbery"
colnames(new_vars)[6] <- "transport_robbery"
colnames(new_vars)[7] <- "personal_robbery"

new_vars$MONTH <- as.numeric(new_vars$MONTH)
df$id <- paste0(df$Year, df$month, df$INEGI)
new_vars$id <- paste0(new_vars$ANO, new_vars$MONTH, new_vars$INEGI)
new_vars <- new_vars[new_vars$id %in% df$id,]
t <- as.data.frame(table(new_vars$id))
df <- left_join(df, new_vars, by=c("Year"="ANO", "month"="MONTH", "INEGI"="INEGI"))
rm(new_vars)
gc()

#
#### CALCULATING MIN DISTANCE ####
#min distance from each treatment municipality to a control municipality, and vice-versa

#first distance calculation
lon1 <- df$long[1]
lat1 <- df$lat[1]
lon2 <- df$long[255]
lat2 <- df$lat[255]

dist_mi <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119 #meters converted to kilometers then to miles
#it's correct!

#creating time_group variable
for(i in unique(df$INEGI)){
  temp <- df[df$INEGI==i,]
  df$time_group[df$INEGI==i] <- min(temp$Year, na.rm=T)
}

length(unique(df$INEGI[df$time_group==2019]))
length(unique(df$INEGI[df$time_group==2018]))
length(unique(df$INEGI[df$time_group==2017]))
length(unique(df$INEGI[df$time_group==2016]))
length(unique(df$INEGI[df$time_group==2015]))
length(unique(df$INEGI[df$time_group==2014]))
length(unique(df$INEGI[df$time_group==2013]))
length(unique(df$INEGI[df$time_group==2012]))
length(unique(df$INEGI[df$time_group==2011]))

#Breaking df into eight datasets. two for each unbalanced panel group
for(i in 2011:2019){
  
  #treated municipalities
  temp <- df[(df$treat_group=="Turnson" | df$treat_group=="Alwayson") & df$time_group==i,]
  temp <- temp %>% 
    distinct(INEGI, .keep_all = TRUE)
  assign(paste("dft", i, sep="_"), temp)
  
  #control municipalities
  temp <- df[(df$treat_group=="Control") & df$time_group==i,]
  temp <- temp %>% 
    distinct(INEGI, .keep_all = TRUE)
  assign(paste("dfc", i, sep="_"), temp)
}

#remove datasets with 0 observations or no partner
rm(dfc_2016, dfc_2019, dft_2012, dft_2013, dft_2015, dft_2016, dft_2017, dft_2018, dft_2019) #no observations
rm(dfc_2012, dfc_2013, dfc_2015, dfc_2016, dfc_2017, dfc_2018, dfc_2019) # no partner

#forloop distance equation to come up with a minimum distance to the border for each municipality

#for treated municipalities
#dft_2011
for(i in unique(dft_2011$INEGI)){ #foreach treated postal code
  dist <- c()
  for(j in unique(dfc_2011$INEGI)){ #for each control postal code
    lon1 <- dft_2011$long[dft_2011$INEGI==i]
    lat1 <- dft_2011$lat[dft_2011$INEGI==i]
    lon2 <- dfc_2011$long[dfc_2011$INEGI==j]
    lat2 <- dfc_2011$lat[dfc_2011$INEGI==j]
    dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
    dist <- append(dist, dist_temp, after=length(dist))
  }
  #calculate minimum distance from the treated postal code to the control postal code
  dft_2011$min_dist[dft_2011$INEGI==i] <- min(dist, na.rm=T)
}
# #dft_2012
# for(i in unique(dft_2012$INEGI)){ #foreach treated postal code
#   dist <- c()
#   for(j in unique(dfc_2012$INEGI)){ #for each control postal code
#     lon1 <- dft_2012$long[dft_2012$INEGI==i]
#     lat1 <- dft_2012$lat[dft_2012$INEGI==i]
#     lon2 <- dfc_2012$long[dfc_2012$INEGI==j]
#     lat2 <- dfc_2012$lat[dfc_2012$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the treated postal code to the control postal code
#   dft_2012$min_dist[dft_2012$INEGI==i] <- min(dist, na.rm=T)
# }
# #dft_2013
# for(i in unique(dft_2013$INEGI)){ #foreach treated postal code
#   dist <- c()
#   for(j in unique(dfc_2013$INEGI)){ #for each control postal code
#     lon1 <- dft_2013$long[dft_2013$INEGI==i]
#     lat1 <- dft_2013$lat[dft_2013$INEGI==i]
#     lon2 <- dfc_2013$long[dfc_2013$INEGI==j]
#     lat2 <- dfc_2013$lat[dfc_2013$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the treated postal code to the control postal code
#   dft_2013$min_dist[dft_2013$INEGI==i] <- min(dist, na.rm=T)
# }
#dft_2014
for(i in unique(dft_2014$INEGI)){ #foreach treated postal code
  dist <- c()
  for(j in unique(dfc_2014$INEGI)){ #for each control postal code
    lon1 <- dft_2014$long[dft_2014$INEGI==i]
    lat1 <- dft_2014$lat[dft_2014$INEGI==i]
    lon2 <- dfc_2014$long[dfc_2014$INEGI==j]
    lat2 <- dfc_2014$lat[dfc_2014$INEGI==j]
    dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
    dist <- append(dist, dist_temp, after=length(dist))
  }
  #calculate minimum distance from the treated postal code to the control postal code
  dft_2014$min_dist[dft_2014$INEGI==i] <- min(dist, na.rm=T)
}
# #dft_2015
# for(i in unique(dft_2015$INEGI)){ #foreach treated postal code
#   dist <- c()
#   for(j in unique(dfc_2015$INEGI)){ #for each control postal code
#     lon1 <- dft_2015$long[dft_2015$INEGI==i]
#     lat1 <- dft_2015$lat[dft_2015$INEGI==i]
#     lon2 <- dfc_2015$long[dfc_2015$INEGI==j]
#     lat2 <- dfc_2015$lat[dfc_2015$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the treated postal code to the control postal code
#   dft_2015$min_dist[dft_2015$INEGI==i] <- min(dist, na.rm=T)
# }
# #dft_2016
# for(i in unique(dft_2016$INEGI)){ #foreach treated postal code
#   dist <- c()
#   for(j in unique(dfc_2016$INEGI)){ #for each control postal code
#     lon1 <- dft_2016$long[dft_2016$INEGI==i]
#     lat1 <- dft_2016$lat[dft_2016$INEGI==i]
#     lon2 <- dfc_2016$long[dfc_2016$INEGI==j]
#     lat2 <- dfc_2016$lat[dfc_2016$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the treated postal code to the control postal code
#   dft_2016$min_dist[dft_2016$INEGI==i] <- min(dist, na.rm=T)
# }
# #dft_2017
# for(i in unique(dft_2017$INEGI)){ #foreach treated postal code
#   dist <- c()
#   for(j in unique(dfc_2017$INEGI)){ #for each control postal code
#     lon1 <- dft_2017$long[dft_2017$INEGI==i]
#     lat1 <- dft_2017$lat[dft_2017$INEGI==i]
#     lon2 <- dfc_2017$long[dfc_2017$INEGI==j]
#     lat2 <- dfc_2017$lat[dfc_2017$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the treated postal code to the control postal code
#   dft_2017$min_dist[dft_2017$INEGI==i] <- min(dist, na.rm=T)
# }
# #dft_2018
# for(i in unique(dft_2018$INEGI)){ #foreach treated postal code
#   dist <- c()
#   for(j in unique(dfc_2018$INEGI)){ #for each control postal code
#     lon1 <- dft_2018$long[dft_2018$INEGI==i]
#     lat1 <- dft_2018$lat[dft_2018$INEGI==i]
#     lon2 <- dfc_2018$long[dfc_2018$INEGI==j]
#     lat2 <- dfc_2018$lat[dfc_2018$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the treated postal code to the control postal code
#   dft_2018$min_dist[dft_2018$INEGI==i] <- min(dist, na.rm=T)
# }
# #dft_2019
# for(i in unique(dft_2019$INEGI)){ #foreach treated postal code
#   dist <- c()
#   for(j in unique(dfc_2019$INEGI)){ #for each control postal code
#     lon1 <- dft_2019$long[dft_2019$INEGI==i]
#     lat1 <- dft_2019$lat[dft_2019$INEGI==i]
#     lon2 <- dfc_2019$long[dfc_2019$INEGI==j]
#     lat2 <- dfc_2019$lat[dfc_2019$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the treated postal code to the control postal code
#   dft_2019$min_dist[dft_2019$INEGI==i] <- min(dist, na.rm=T)
# }

#for control municipalities
#dfc_2011
for(i in unique(dfc_2011$INEGI)){ #foreach control postal code
  dist <- c()
  for(j in unique(dft_2011$INEGI)){ #for each treated postal code
    lon1 <- dfc_2011$long[dfc_2011$INEGI==i]
    lat1 <- dfc_2011$lat[dfc_2011$INEGI==i]
    lon2 <- dft_2011$long[dft_2011$INEGI==j]
    lat2 <- dft_2011$lat[dft_2011$INEGI==j]
    dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
    dist <- append(dist, dist_temp, after=length(dist))
  }
  #calculate minimum distance from the control postal code to the treated postal code
  dfc_2011$min_dist[dfc_2011$INEGI==i] <- min(dist, na.rm=T)
}
# #dfc_2012
# for(i in unique(dfc_2012$INEGI)){ #foreach control postal code
#   dist <- c()
#   for(j in unique(dft_2012$INEGI)){ #for each treated postal code
#     lon1 <- dfc_2012$long[dfc_2012$INEGI==i]
#     lat1 <- dfc_2012$lat[dfc_2012$INEGI==i]
#     lon2 <- dft_2012$long[dft_2012$INEGI==j]
#     lat2 <- dft_2012$lat[dft_2012$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the control postal code to the treated postal code
#   dfc_2012$min_dist[dfc_2012$INEGI==i] <- min(dist, na.rm=T)
# }
# #dfc_2013
# for(i in unique(dfc_2013$INEGI)){ #foreach control postal code
#   dist <- c()
#   for(j in unique(dft_2013$INEGI)){ #for each treated postal code
#     lon1 <- dfc_2013$long[dfc_2013$INEGI==i]
#     lat1 <- dfc_2013$lat[dfc_2013$INEGI==i]
#     lon2 <- dft_2013$long[dft_2013$INEGI==j]
#     lat2 <- dft_2013$lat[dft_2013$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the control postal code to the treated postal code
#   dfc_2013$min_dist[dfc_2013$INEGI==i] <- min(dist, na.rm=T)
# }
#dfc_2014
for(i in unique(dfc_2014$INEGI)){ #foreach control postal code
  dist <- c()
  for(j in unique(dft_2014$INEGI)){ #for each treated postal code
    lon1 <- dfc_2014$long[dfc_2014$INEGI==i]
    lat1 <- dfc_2014$lat[dfc_2014$INEGI==i]
    lon2 <- dft_2014$long[dft_2014$INEGI==j]
    lat2 <- dft_2014$lat[dft_2014$INEGI==j]
    dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
    dist <- append(dist, dist_temp, after=length(dist))
  }
  #calculate minimum distance from the control postal code to the treated postal code
  dfc_2014$min_dist[dfc_2014$INEGI==i] <- min(dist, na.rm=T)
}
# #dfc_2015
# for(i in unique(dfc_2015$INEGI)){ #foreach control postal code
#   dist <- c()
#   for(j in unique(dft_2015$INEGI)){ #for each treated postal code
#     lon1 <- dfc_2015$long[dfc_2015$INEGI==i]
#     lat1 <- dfc_2015$lat[dfc_2015$INEGI==i]
#     lon2 <- dft_2015$long[dft_2015$INEGI==j]
#     lat2 <- dft_2015$lat[dft_2015$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the control postal code to the treated postal code
#   dfc_2015$min_dist[dfc_2015$INEGI==i] <- min(dist, na.rm=T)
# }
# #dfc_2016
# for(i in unique(dfc_2016$INEGI)){ #foreach control postal code
#   dist <- c()
#   for(j in unique(dft_2016$INEGI)){ #for each treated postal code
#     lon1 <- dfc_2016$long[dfc_2016$INEGI==i]
#     lat1 <- dfc_2016$lat[dfc_2016$INEGI==i]
#     lon2 <- dft_2016$long[dft_2016$INEGI==j]
#     lat2 <- dft_2016$lat[dft_2016$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the control postal code to the treated postal code
#   dfc_2016$min_dist[dfc_2016$INEGI==i] <- min(dist, na.rm=T)
# }
# #dfc_2017
# for(i in unique(dfc_2017$INEGI)){ #foreach control postal code
#   dist <- c()
#   for(j in unique(dft_2017$INEGI)){ #for each treated postal code
#     lon1 <- dfc_2017$long[dfc_2017$INEGI==i]
#     lat1 <- dfc_2017$lat[dfc_2017$INEGI==i]
#     lon2 <- dft_2017$long[dft_2017$INEGI==j]
#     lat2 <- dft_2017$lat[dft_2017$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the control postal code to the treated postal code
#   dfc_2017$min_dist[dfc_2017$INEGI==i] <- min(dist, na.rm=T)
# }
# #dfc_2018
# for(i in unique(dfc_2018$INEGI)){ #foreach control postal code
#   dist <- c()
#   for(j in unique(dft_2018$INEGI)){ #for each treated postal code
#     lon1 <- dfc_2018$long[dfc_2018$INEGI==i]
#     lat1 <- dfc_2018$lat[dfc_2018$INEGI==i]
#     lon2 <- dft_2018$long[dft_2018$INEGI==j]
#     lat2 <- dft_2018$lat[dft_2018$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the control postal code to the treated postal code
#   dfc_2018$min_dist[dfc_2018$INEGI==i] <- min(dist, na.rm=T)
# }
# #dfc_2019
# for(i in unique(dfc_2019$INEGI)){ #foreach control postal code
#   dist <- c()
#   for(j in unique(dft_2019$INEGI)){ #for each treated postal code
#     lon1 <- dfc_2019$long[dfc_2019$INEGI==i]
#     lat1 <- dfc_2019$lat[dfc_2019$INEGI==i]
#     lon2 <- dft_2019$long[dft_2019$INEGI==j]
#     lat2 <- dft_2019$lat[dft_2019$INEGI==j]
#     dist_temp <- (distHaversine(c(lon1, lat1), c(lon2, lat2))/1000)*0.62137119
#     dist <- append(dist, dist_temp, after=length(dist))
#   }
#   #calculate minimum distance from the control postal code to the treated postal code
#   dfc_2019$min_dist[dfc_2019$INEGI==i] <- min(dist, na.rm=T)
# }

#recombine the dataframes
min_dist <- rbind(dft_2011, dft_2014,
                  dfc_2011, dfc_2014)
rm(dft_2011, dft_2014, dft_2015,
   dfc_2011, dfc_2014, dfc_2015)
gc()

#select only code and min dist
min_dist <- min_dist %>%
  dplyr::select(INEGI, min_dist)

### JOIN min_dist WITH df ###

df <- left_join(df, min_dist)
rm(min_dist)
gc()

#remove municipalities with no min_dist (no partner)
df <- df[!is.na(df$min_dist),]

#### WRITE TO CSV ####

write.csv(df, "Outputs/Analysis all muns.csv", row.names = F, na="")
